library(haven)
library(rdrobust)
library(rddensity)
library(ggplot2)
library(ggeffects)
library(dplyr)
library(scales)
library(stargazer)
library(fastDummies)
library(sjPlot)
library(fixest)
library(sjmisc)
#Load necessary packages

trial<- read_dta("trial_replication.dta") #Load the full data
civilian<-trial[trial$category_e==0,] #The data including only civilian defendants
utils:::print.sessionInfo(sessionInfo()[-8],locale = FALSE)


### Figure 1. The 1956 reform and the severity of sentences------------
a<-civilian %>% 
  mutate(period =cut(timediff,breaks=seq(min(timediff,na.rm=T),max(timediff,na.rm=T),20))) %>% 
  group_by(period) %>% 
  summarise(md2l15 = mean(d2h15, na.rm = T),
            n=n(),
            timediff=median(timediff,na.rm=T)) 
#Aggregate the data into groups, each including civilian defendants tried within 20 days

before_p1 <- 
  trial[trial$category_e==0,] %>% 
  filter(timediff >=-800& timediff < 0&!is.na(timediff)) %>% 
  mutate(weight = 1 - abs(timediff) / 800) #assign triangular weights
fit_before_cov<-lm(d2h15~timediff+age+male+islander+d2_per+
                     weapon+subversion+leakmilitary+presreyearr_l, 
                   data = before_p1, weights = weight)
fit_before_covdata <- ggpredict(fit_before_cov,terms="timediff")
#Fit a linear weighted regression before the cutoff using the original non-aggregated data

after_p1 <- 
  trial[trial$category_e==0,] %>% 
  filter(timediff >0& timediff < 800&!is.na(timediff)) %>% 
  mutate(weight = 1 - abs(timediff) / 800) #assign triangular weights
fit_after_cov<-lm(d2h15~timediff+age+male+islander+d2_per+
                    weapon+subversion+leakmilitary+presreyearr_l, 
                  data = after_p1, weights = weight)
fit_after_covdata <- ggpredict(fit_after_cov,terms="timediff")
#Fit a linear weighted regression after the cutoff using the original non-aggregated data

ggplot(trial,aes(x=timediff, y=d2h15))+
  geom_point(a,alpha = 1/3,mapping=aes(x=timediff, y=md2l15))+
  geom_line(fit_before_covdata,mapping=aes(x, predicted))+
  geom_line(fit_before_covdata,mapping=aes(x, conf.low),linetype = 2,linewidth=0.3)+
  geom_line(fit_before_covdata,mapping=aes(x, conf.high),linetype = 2,linewidth=0.3)+
  geom_line(fit_after_covdata,mapping=aes(x, predicted))+
  geom_line(fit_after_covdata,mapping=aes(x, conf.low),linetype = 2,linewidth=0.3)+
  geom_line(fit_after_covdata,mapping=aes(x, conf.high),linetype = 2,linewidth=0.3)+
  geom_vline(xintercept = 0)+
  scale_x_continuous(name = 'Days from October 1st, 1956',
                     limits = c( -600 , 600 ))+
  scale_y_continuous( name =expression(paste("Probability of sentences", "">="15 years")),limits = c(0, 1.05))+
  theme( panel.grid.major =  element_line(colour = "grey90"), 
         panel.grid.minor =  element_line(colour = "grey90"),legend.position="none",
         panel.background = element_rect(size = 1,colour = "lightgray",fill='white'))
#Plot the fitted regression lines along with the aggregated data points


### Figure 2. The 1956 reform and the severity of sentences------------
d2h15<-civilian$d2h15
timediff<-civilian$timediff
Z<-civilian[,c("age","male","islander","d2_per","weapon","subversion",
               "leakmilitary","presreyearr_l")]
#DV, running variable, and covariates of the RD models

m1<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="cerrd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, triangular kernel, CE bw 
m2<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="cerrd", p = 2, 
             covs=Z,masspoints = "adjust") #quadratic fit, triangular kernel, CE bw
m3<-rdrobust(d2h15, timediff, kernel = "uni",bwselect="cerrd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, uniform kernel, CE bw 
m4<-rdrobust(d2h15, timediff, kernel = "uni",bwselect="cerrd", p = 2, 
             covs=Z,masspoints = "adjust",bwcheck=280) #quadratic fit, uniform kernel, CE bw
m5<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="mserd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, triangular kernel, MSE bw 
m6<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="mserd", p = 2, 
             covs=Z,masspoints = "adjust") #quadratic fit, triangular kernel, MSE bw
m7<-rdrobust(d2h15, timediff, kernel = "uni",bwselect="mserd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, uniform kernel, MSE bw 
m8<-rdrobust(d2h15, timediff, kernel = "uni",bwselect="mserd", p = 2, 
             covs=Z,masspoints = "adjust") #quadratic fit, uniform kernel, MSE bw 

r<-NULL
for(i in 1:8){
  r<-rbind(r,cbind(get(paste0("m", i))$coef[3],get(paste0("m", i))$ci[2,1],get(paste0("m", i))$ci[2,2],
                   get(paste0("m", i))$ci[3,1],get(paste0("m", i))$ci[3,2]))
}
r<-data.frame(r)
colnames(r)<-c("resp","bclower","bcupper","rlower","rupper")
r$trt <-c(3,2,1,0,2.9,1.9,0.9,-0.1)
r$bw<-c("CE","CE","CE","CE","MSE","MSE","MSE","MSE")

ggplot(r, aes(resp,trt,color=bw))+
  geom_pointrange(aes(xmin = bclower, xmax = bcupper,shape=bw))+
  geom_linerange(aes(y=trt-0.05,xmin = rlower, xmax = rupper),linetype=3)+
  theme(panel.grid.major =element_blank(),
        panel.grid.minor =element_blank(),
        legend.position="top",legend.direction = "horizontal",
        axis.text.y = element_text(size =12),
        strip.text = element_text(size =12),
        panel.background = element_rect(linewidth = 1,colour = "lightgray",fill='white'))+
  scale_y_continuous("",breaks=c(2.95,1.95,0.95,-0.05),
                     labels=c("Linear fit, triangular kernel",
                              "Quadratic fit, triangular kernel",
                              "Linear fit, uniform kernel",
                              "Quadratic fit, uniform kernel"))+
  scale_color_brewer("Bandwidth",palette = "Dark2")+
  scale_shape_discrete("Bandwidth")+
  geom_vline(xintercept = 0, color = "darkgray")+
  scale_x_continuous("Local average treatment effect")
#Plot the LATE point estimates and 95% CI


### Figure 3. The 1956 reform and the severity of sentences across different types of offenses------------
d2h15<-civilian$d2h15[civilian$d2_maj==1]
timediff<-civilian$timediff[civilian$d2_maj==1]
Z<-civilian[civilian$d2_maj==1,c("age","male","islander","presreyearr_l")]
#binary DV, running variable, and covariates of the RD models for major offenses
m1<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="cerrd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, triangular kernel, CE bw
m2<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="mserd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, triangular kernel, MSE bw

d2h15<-civilian$d2h15[civilian$d2_mid==1]
timediff<-civilian$timediff[civilian$d2_mid==1]
Z<-civilian[civilian$d2_mid==1,c("age","male","islander","presreyearr_l")]
#binary DV, running variable, and covariates of the RD models for medium offenses
m5<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="cerrd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, triangular kernel, CE bw
m6<-rdrobust(d2h15, timediff, kernel = "tri",bwselect="mserd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, triangular kernel, MSE bw

d2_termmm<-civilian$d2_termmm[civilian$d2_maj==1]
timediff<-civilian$timediff[civilian$d2_maj==1]
Z<-civilian[civilian$d2_maj==1,c("age","male","islander","presreyearr_l")]
#continuous DV, running variable, and covariates of the RD models for major offenses
m9<-rdrobust(d2_termmm, timediff, kernel = "tri",bwselect="cerrd", p = 1, 
             covs=Z,masspoints = "adjust") #linear fit, triangular kernel, CE bw
m10<-rdrobust(d2_termmm, timediff, kernel = "tri",bwselect="mserd", p = 1, 
              covs=Z,masspoints = "adjust") #linear fit, triangular kernel, MSE bw

d2_termmm<-civilian$d2_termmm[civilian$d2_mid==1]
timediff<-civilian$timediff[civilian$d2_mid==1]
Z<-civilian[civilian$d2_mid==1,c("age","male","islander","presreyearr_l")]
#continuous DV, running variable, and covariates of the RD models for medium offenses
m13<-rdrobust(d2_termmm, timediff, kernel = "tri",bwselect="cerrd", p = 1, 
              covs=Z,masspoints = "adjust") #linear fit, triangular kernel, CE bw
m14<-rdrobust(d2_termmm, timediff, kernel = "tri",bwselect="mserd", p = 1, 
              covs=Z,masspoints = "adjust") #linear fit, triangular kernel, MSE bw

d2_termmm<-civilian$d2_termmm[civilian$d2_819==1]
timediff<-civilian$timediff[civilian$d2_819==1]
Z<-civilian[civilian$d2_819==1,c("age","male","islander","presreyearr_l")]
#continuous DV, running variable, and covariates of the RD models for minor offenses
m17<-rdrobust(d2_termmm, timediff, kernel = "tri",bwselect="cerrd", p = 1, 
              covs=Z,masspoints = "adjust") #linear fit, triangular kernel, CE bw
m18<-rdrobust(d2_termmm, timediff, kernel = "tri",bwselect="mserd", p = 1, 
              covs=Z,masspoints = "adjust") #linear fit, triangular kernel, MSE bw

r<-NULL
for(i in c(1,2,5,6,9,10,13,14,17,18)){
  r<-rbind(r,cbind(get(paste0("m", i))$coef[3],get(paste0("m", i))$ci[2,1],get(paste0("m", i))$ci[2,2],
                   get(paste0("m", i))$ci[3,1],get(paste0("m", i))$ci[3,2]))
}
r<-data.frame(r)
colnames(r)<-c("resp","bclower","bcupper","rlower","rupper")
r$group<-c(rep("DV: more than 15yr",4),rep("DV: sentence in months",6))
r$bw<-rep(c("CE","MSE"),5)
r$trt <-c(3,2.9,2,1.9,3,2.9,2,1.9,1,0.9)
r$group<-factor(r$group, levels=c('DV: more than 15yr',"DV: sentence in months"))

ggplot(r, aes(resp,trt,color=bw))+
  geom_pointrange(aes(xmin = bclower, xmax = bcupper,shape=bw))+
  geom_linerange(aes(y=trt-0.05,xmin = rlower, xmax = rupper),linetype=3)+
  facet_grid(cols = vars(group),scales = "free")+
  theme( panel.grid.major = element_blank(),
         panel.grid.minor = element_blank(),
         legend.position="top",legend.direction = "horizontal",
         axis.text.y = element_text(size =12),
         strip.text = element_text(size =12),
         panel.background = element_rect(size = 1,colour = "lightgray",fill='white'))+
  scale_y_continuous("",
                     breaks=c(2.95,1.95,0.95),
                     labels=c("Major offenses",
                              "Medium offenses",
                              "Minor offenses"))+
  scale_color_brewer("Bandwidth",palette = "Dark2")+
  scale_shape_discrete("Bandwidth")+
  geom_vline(xintercept = 0, color = "darkgray")+
  scale_x_continuous("Local average treatment effect")
#Plot the LATE point estimates and 95% CI